data1=read.csv("test1.csv",encoding = 'UTF-8')
fm=lm(prec~lati,data=data1)
x=fm$model[,1]
y=fm$model[,2]
fm$model
prec <dbl> | lati <dbl> | |||
---|---|---|---|---|
1 | 48.25 | 40.5 | ||
2 | 193.72 | 36.6 | ||
3 | 413.94 | 35.5 | ||
4 | 358.60 | 37.5 | ||
5 | 615.04 | 35.4 | ||
6 | 752.42 | 33.8 | ||
7 | 435.43 | 35.6 | ||
8 | 238.55 | 36.6 | ||
9 | 87.85 | 39.8 | ||
10 | 316.00 | 36.1 |
cor(x,y)
## [1] -0.9041104
cor.test(x,y)
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -15.11, df = 51, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.9437700 -0.8387982
## sample estimates:
## cor
## -0.9041104
fm$coefficients
## (Intercept) lati
## 3392.17587 -82.09156
所以纬度与降水量线性回归模型为
fm1=lm(prec~lati+long+alti+evap,data=data1)
fm1$coefficients
## (Intercept) lati long alti evap
## 1333.42267932 -55.43028189 10.86386521 0.04060350 -0.06360033
coef.sd<-function(fm){
b=fm$coefficients
si=apply(fm$model,2,sd)
bi=b[-1]*si[-1]/si[1];bi
}
coef.sd(fm1)
## lati long alti evap
## -0.6104780 0.1652505 0.1150597 -0.1655769
summary(fm1)
##
## Call:
## lm(formula = prec ~ lati + long + alti + evap, data = data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -191.98 -58.43 -10.53 50.50 176.12
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1333.42268 1043.47232 1.278 0.2074
## lati -55.43028 12.57465 -4.408 5.85e-05 ***
## long 10.86387 7.11629 1.527 0.1334
## alti 0.04060 0.02256 1.800 0.0781 .
## evap -0.06360 0.04857 -1.309 0.1967
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 88.23 on 48 degrees of freedom
## Multiple R-squared: 0.8444, Adjusted R-squared: 0.8314
## F-statistic: 65.11 on 4 and 48 DF, p-value: < 2.2e-16
summary(fm1)$r.sq
## [1] 0.8443856
library(leaps)
varselect=regsubsets(prec~lati+long+alti+evap,data=data1)
result=summary(varselect)
data.frame(result$outmat,RSS=result$rss,R2=result$rsq)
lati <chr> | long <chr> | alti <chr> | evap <chr> | RSS <dbl> | R2 <dbl> | |
---|---|---|---|---|---|---|
1 ( 1 ) | * | 438411.4 | 0.8174156 | |||
2 ( 1 ) | * | * | 405246.7 | 0.8312276 | ||
3 ( 1 ) | * | * | * | 386997.8 | 0.8388277 | |
4 ( 1 ) | * | * | * | * | 373652.5 | 0.8443856 |
data.frame(result$outmat,adjR2=result$adjr2,Cp=result$cp,BIC=result$bic)
lati <chr> | long <chr> | alti <chr> | evap <chr> | adjR2 <dbl> | Cp <dbl> | BIC <dbl> | |
---|---|---|---|---|---|---|---|
1 ( 1 ) | * | 0.8138355 | 7.319027 | -82.18817 | |||
2 ( 1 ) | * | * | 0.8244767 | 5.058643 | -82.38694 | ||
3 ( 1 ) | * | * | * | 0.8289600 | 4.714355 | -80.85874 | |
4 ( 1 ) | * | * | * | * | 0.8314177 | 5.000000 | -78.74836 |
#调整决定系数越大越好,Cp和对于的变量个数p越接近越好,BIC越小越好(是残差平方和的调整修正)
fm.step=step(fm1,direction = "forward")#向前引入法
## Start: AIC=479.62
## prec ~ lati + long + alti + evap
fm.step=step(fm1,direction = "backward")
## Start: AIC=479.62
## prec ~ lati + long + alti + evap
##
## Df Sum of Sq RSS AIC
## - evap 1 13345 386998 479.48
## <none> 373653 479.62
## - long 1 18142 391795 480.13
## - alti 1 25226 398878 481.08
## - lati 1 151262 524914 495.64
##
## Step: AIC=479.48
## prec ~ lati + long + alti
##
## Df Sum of Sq RSS AIC
## <none> 386998 479.48
## - long 1 26976 413974 481.05
## - alti 1 41219 428217 482.85
## - lati 1 369472 756470 513.00
fm.step=step(fm1,direction = "both")#逐步筛选法
## Start: AIC=479.62
## prec ~ lati + long + alti + evap
##
## Df Sum of Sq RSS AIC
## - evap 1 13345 386998 479.48
## <none> 373653 479.62
## - long 1 18142 391795 480.13
## - alti 1 25226 398878 481.08
## - lati 1 151262 524914 495.64
##
## Step: AIC=479.48
## prec ~ lati + long + alti
##
## Df Sum of Sq RSS AIC
## <none> 386998 479.48
## + evap 1 13345 373653 479.62
## - long 1 26976 413974 481.05
## - alti 1 41219 428217 482.85
## - lati 1 369472 756470 513.00
summary(fm.step)
##
## Call:
## lm(formula = prec ~ lati + long + alti, data = data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -209.90 -53.49 -13.85 49.06 181.67
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1386.96962 1050.24347 1.321 0.1928
## lati -66.07773 9.66096 -6.840 1.18e-08 ***
## long 12.92067 6.99115 1.848 0.0706 .
## alti 0.04949 0.02167 2.285 0.0267 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 88.87 on 49 degrees of freedom
## Multiple R-squared: 0.8388, Adjusted R-squared: 0.829
## F-statistic: 85.01 on 3 and 49 DF, p-value: < 2.2e-16